In this first chunk, let’s read in the Census microdata. Here is some example code on how to read in the data, create new variables to categorize the rows of data into groups, and then summarize the data to create information about Louisville.
Our goal is to create variables for gender, age group, whether someone is a mother, whether someone is married, their level of education, their income, whether they are the head of household, and the number of children they have.
This code chunk will identify which households are homeowners vs. renters (in the homeownership variable) and which households are cost-burdened, meaning they pay more thatn 30% of their income toward rent or a mortgage (in the cost_burden variable).
There are also variables for severe cost burden (households that pay more than half of their income towards housing) and households with severe housing problems (lacking a kitchen, adequate plumbing, or an ample number of rooms for the number of people living there).
#Waffle Chart
H_gen_trend<-survey_by_demog(census_microdata081122, weight_var = "HHWT", 'homeownership') %>%
filter(var_type == 'percent',
race == 'total',
sex != 'total')
trend(H_gen_trend, homeownership, plot_title = "Homeownership by Year", cat = 'sex', y_title = 'Percent',
caption_text = "Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA"
)
H_singFem_rank<- census_microdata081122 %>%
filter(year == '2019',
earner_type == 'single_earner')%>%
survey_by_demog( weight_var = "HHWT", 'homeownership') %>%
filter(sex == 'total',
race == 'total',
var_type == 'percent')
ranking(H_singFem_rank, 'homeownership')
H_sinFem_kids <-census_microdata081122 %>%
filter(year == '2019',
earner_type == 'single_earner',
NCHILD > 0)%>%
survey_by_demog( weight_var = "HHWT", 'homeownership') %>%
filter(sex == 'total',
race == 'total',
var_type == 'percent')
ranking(H_sinFem_kids, 'homeownership')
H_s_m_kids_trend <- survey_by_demog(census_microdata081122 ,weight_var = "HHWT", 'homeownership', other_grouping_vars = c("kd_pres")) %>%
filter(var_type == 'percent',
race == 'total',
sex != 'total') %>%
pivot_wider( names_from = 'kd_pres', values_from = 'homeownership')
H_s_m_kids_trend %<>% filter(sex == "female") %>% select(-sex)
trend(H_s_m_kids_trend, kids:no_kids, plot_title = "Female With Kids Homeownership by Year",
cat = c("Kids Present" = "kids", "No Kids Present" = "no_kids"), y_title = 'Percent',
caption_text = "Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA"
)
H_Fem_race <- survey_by_demog(census_microdata081122 ,weight_var = "HHWT", 'homeownership') %>%
filter(var_type == 'percent',
race != 'total',
sex == 'total')
trend(H_Fem_race, homeownership, plot_title = "Female With Kids Homeownership by Year", cat = 'race', y_title = 'Percent',
caption_text = "Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA"
)
#Income
#################KEEP################
## create hist for each of the most three recent years -> just for verification...that there isnt anything weird
## if feeling funky, use gganimate to create a gif of histograms from 2000 to 2019
lville_2019 %>%
filter(HHINCOME <= cut_95) %>%
func_plt_hist_overlay( "sex")
lville_2019 %>%
filter(sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
func_plt_hist_overlay( "race")
##switch legend
lville_2019 %>%
filter(sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
mutate(cost_burden = factor(cost_burden, levels = rev(c(TRUE, FALSE)),
labels = rev(c("Cost Burdened", "Non Cost Burdened")), ordered = TRUE) ) %>%
ggplot( aes(x=HHINCOME, fill = cost_burden), color="#00A9B7", alpha=0.5, position = "stack", binwidth = 10000) +
geom_histogram()
I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('earner_type')) %>%
filter(var_type == 'percent',
race == 'total',
sex != 'total') %>%
pivot_wider( names_from = 'earner_type', values_from = 'cost_burden')
I_CB_earn_trend %<>% select(-sex) ##Ask harrison about this
trend(I_CB_earn_trend, multi_earner:single_earner, plot_title = "Homeownership by Year",
cat = c("Multi Earner" = "multi_earner", "Single Earner" = "single_earner"), y_title = 'Percent',
caption_text = "Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA"
)
# H_s_m_kids_trend %<>% filter(sex == "female") %>% select(-sex)
#
# trend(H_s_m_kids_trend, kids:no_kids, plot_title = "Female With Kids Homeownership by Year",
# cat = c("Kids Present" = "kids", "No Kids Present" = "no_kids"), y_title = 'Percent',
# caption_text = "Source Greater Louisville Project
# Data from GLP analysis of ACS microdata from IPUMS-USA"
# )
E_singM_singF <- lville_2019 %>%
filter(earner_type == 'single_earner') %>%
group_by(sex, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ, levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")), ordered = TRUE)
)
ggplot(E_singM_singF, aes(x=sex, y=rate, fill=educ)) +
geom_bar(stat="identity", position='fill')
E_singF_race <- lville_2019 %>%
filter(sex == 'female',
earner_type == 'single_earner') %>%
group_by(race, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ, levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")), ordered = TRUE)
)
ggplot(E_singF_race, aes(x=race, y=rate, fill=educ)) +
geom_bar(stat="identity", position='fill')